Function to load data from non-HLA. Converts all columns to the appropriate types.

load_data <- function() {
  df<- read.csv("data/non_HLA.csv")
  df$Sex <- df$Sex %>% as.factor()
  df$Condition <- df$Condition %>% 
    as.factor()
  num_colnames<-c("AT1R.U.mL..CO.17.", "ENO1.Normalized..CO.4218.",
                  "FLRT2.Normalized..CO.688.", "VM.Normalized..CO.820.",
                  "TUBA1B.Normalized..CO.1987.", "IFIH1.Normalized..CO.3870.",
                  "PTPRN.Normalized..CO.3042.", "AURKA.Normalized..CO.4892.",
                  "PPIA.Normalized..CO.3292.", "EIF2A.Normalized..CO.6901.",
                  "PECR.Normalized..CO.4120.", "PRKCH.Normalized..CO.1048.",
                  "CXCL11.Normalized..CO.309.", "CXCL10.Normalized..CO.285.",
                  "AGRIN.Normalized..CO.504.", "GAPDH.Normalized..CO.508.",
                  "MYOSIN.Normalized..CO.9341.", "AGT.Normalized..CO.1641.",
                  "CHAF1B.Normalized..CO.11722.", "PLA2R.Normalized..CO.195.",
                  "LG3.Normalized..CO.3801.", "GSTT1.Normalized..CO.6136.",
                  "LMNA.Normalized..CO.6633.", "PRKCZ.Normalized..CO.9104.",
                  "LMNB1.Normalized..CO.2065.", "ARHGDIB.Normalized..CO.3918.",
                  "HNRNPK.Normalized..CO.845.", "IFNG.Normalized..CO.498.",
                  "REG3A.Normalized..CO.86.", "NUCLEOLIN.Normalized..CO.3669.",
                  "CD36.Normalized..CO.1591.", "TNFA.Normalized..CO.5331.",
                  "CXCL9.Normalized..CO.575.", "GDNF.Normalized..CO.1004.",
                  "COLLAGEN.I.Normalized..CO.375.", "COLLAGEN.II.Normalized..CO.155.",
                  "COLLAGEN.III.Normalized..CO.128.", "COLLAGEN.IV.Normalized..CO.71.",
                  "COLLAGEN.V.Normalized..CO.187.", "FIBRONECTIN.Normalized..CO.141.",
                  "Age.at.Transplant", "DSA_num")
  df <- df %>% mutate(across(all_of(num_colnames), ~as.numeric(as.character(.))))
  df$AGRIN.Normalized..CO.504. <- df$AGRIN.Normalized..CO.504./10
  df$AGT.Normalized..CO.1641. <- df$AGT.Normalized..CO.1641./100
  df$ARHGDIB.Normalized..CO.3918. <- df$ARHGDIB.Normalized..CO.3918./100
  df$AURKA.Normalized..CO.4892. <- df$AURKA.Normalized..CO.4892./100
  df$CD36.Normalized..CO.1591. <- df$CD36.Normalized..CO.1591./100
  df$CHAF1B.Normalized..CO.11722. <- df$CHAF1B.Normalized..CO.11722./100
  df$COLLAGEN.I.Normalized..CO.375. <- df$COLLAGEN.I.Normalized..CO.375./10
  df$COLLAGEN.II.Normalized..CO.155. <- df$COLLAGEN.II.Normalized..CO.155./10
  df$COLLAGEN.III.Normalized..CO.128. <- df$COLLAGEN.III.Normalized..CO.128./10
  df$COLLAGEN.IV.Normalized..CO.71. <- df$COLLAGEN.IV.Normalized..CO.71./10
  df$COLLAGEN.V.Normalized..CO.187. <- df$COLLAGEN.V.Normalized..CO.187./10
  df$CXCL10.Normalized..CO.285. <- df$CXCL10.Normalized..CO.285./10
  df$CXCL11.Normalized..CO.309. <- df$CXCL11.Normalized..CO.309./10
  df$CXCL9.Normalized..CO.575. <- df$CXCL9.Normalized..CO.575./100
  df$EIF2A.Normalized..CO.6901. <- df$EIF2A.Normalized..CO.6901./100
  df$ENO1.Normalized..CO.4218. <- df$ENO1.Normalized..CO.4218./100
  df$FIBRONECTIN.Normalized..CO.141. <- df$FIBRONECTIN.Normalized..CO.141./10
  df$FLRT2.Normalized..CO.688. <- df$FLRT2.Normalized..CO.688./100
  df$GAPDH.Normalized..CO.508. <- df$GAPDH.Normalized..CO.508./100
  df$GDNF.Normalized..CO.1004. <- df$GDNF.Normalized..CO.1004./100
  df$GSTT1.Normalized..CO.6136. <- df$GSTT1.Normalized..CO.6136./100
  df$HNRNPK.Normalized..CO.845. <- df$HNRNPK.Normalized..CO.845./100
  df$IFNG.Normalized..CO.498. <- df$IFNG.Normalized..CO.498./100
  df$IFIH1.Normalized..CO.3870. <- df$IFIH1.Normalized..CO.3870./100
  df$LG3.Normalized..CO.3801. <- df$LG3.Normalized..CO.3801./100
  df$LMNA.Normalized..CO.6633. <- df$LMNA.Normalized..CO.6633./100
  df$LMNB1.Normalized..CO.2065. <- df$LMNB1.Normalized..CO.2065./100
  df$MYOSIN.Normalized..CO.9341. <- df$MYOSIN.Normalized..CO.9341./100
  df$NUCLEOLIN.Normalized..CO.3669. <- df$NUCLEOLIN.Normalized..CO.3669./10
  df$PECR.Normalized..CO.4120. <- df$PECR.Normalized..CO.4120./100
  df$PLA2R.Normalized..CO.195. <- df$PLA2R.Normalized..CO.195./100
  df$PPIA.Normalized..CO.3292. <- df$PPIA.Normalized..CO.3292./100
  df$PRKCZ.Normalized..CO.9104. <- df$PRKCZ.Normalized..CO.9104./100
  df$PRKCH.Normalized..CO.1048. <- df$PRKCH.Normalized..CO.1048./100
  df$PTPRN.Normalized..CO.3042. <- df$PTPRN.Normalized..CO.3042./100
  df$REG3A.Normalized..CO.86. <- df$REG3A.Normalized..CO.86./10
  df$TNFA.Normalized..CO.5331. <- df$TNFA.Normalized..CO.5331./100
  df$TUBA1B.Normalized..CO.1987. <- df$TUBA1B.Normalized..CO.1987./100
  df$VM.Normalized..CO.820. <- df$VM.Normalized..CO.820./100
  return(df)
}

Function to replace normalized values with “1” for above cutoff and “0” for below. Cutoff numbers are in the column names.

process_columns_with_cutoff <- function(colnames_vector, df) { 
  # Loop through each column name provided in the vector 
  for (colname in colnames_vector) {
    cutoff_str <- sub(".*CO\\.", "", colname)
    cutoff_str <- gsub("\\..*", "", cutoff_str)
    cutoff <- as.numeric(cutoff_str)
    df[[colname]] <- ifelse(df[[colname]] > cutoff, 1, 0)
    #df[[colname]] <- as.factor(df[[colname]])
  }
  return(df)
}

Function to plot distributions of continuous variables.

plot_variable_distributions <- function(data, variable_names, outcome_var) { 
  # Pivot the data frame to long format 
  data_long <- pivot_longer(data, cols = all_of(variable_names), names_to = "variable", values_to = "value")

  # Filter out rows with NA values in 'value' to avoid issues in plotting
  data_long <- na.omit(data_long)
  
  # Plot data with aes() adjusted to include color based on the outcome variable
  p <- ggplot(data_long, aes(x = value, fill = as.factor(get(outcome_var)), color = as.factor(get(outcome_var)))) +
    geom_density(alpha = 0.3) +  # Adjust alpha for fill transparency
    facet_wrap(~variable, scales = "free", nrow = 20) +
    scale_fill_brewer(palette = "Set1", name = outcome_var) +
    scale_color_brewer(palette = "Set1", name = outcome_var) +  # Add this line to use consistent color palette for lines
    theme_minimal() +
    theme(legend.position = "bottom") +
    labs(x = "Value", y = "Density") 
  
  # Alter text for readability
  p <- p + theme(strip.text = element_text(size = 8),  # Text size
                 strip.text.x = element_text(angle = 0, hjust = 0.5),  # Adjust angle and horizontal justification
                 strip.background = element_rect(fill = "white"))  # Change background color for visibility
  print(p)
  ggsave("variable_distributions.svg")
}

Function that returns subsetted data frame based on the column names provided

create_subset_df <- function(original_df, colnames_array) {
  # Ensure that the column names exist in the original dataframe 
  valid_colnames <- colnames_array[colnames_array %in% names(original_df)]

  subset_df <- original_df[, valid_colnames, drop = FALSE]
  return(subset_df)
}

Return all highly correlated variables

find_highly_correlated_variables <- function(dataframe, column_names, threshold = 0.8) {
  subset_df <- dataframe[, column_names]
  corr_matrix <- cor(subset_df, use = "complete.obs")  # 'complete.obs' handles missing values by using complete cases
  high_corr_pairs <- which(abs(corr_matrix) > threshold & lower.tri(corr_matrix), arr.ind = TRUE)
  
  # Extract the variable names for these pairs
  high_corr_vars <- unique(c(column_names[high_corr_pairs[, "row"]], column_names[high_corr_pairs[, "col"]]))
  
  return(high_corr_vars)
}

Write formulas for the multinomial logistic regression

create_pom_formula <- function(response_var, clin_cofactors, independent_vars, df) {
  all_vars <- c(response_var, clin_cofactors, independent_vars) 
  missing_vars <- setdiff(all_vars, names(df))
  
  if(length(missing_vars) > 0) { stop(paste("The following variables are missing in the dataframe:",
                                             paste(missing_vars, collapse=", "), ".")) 
    }
  predictors <- c(clin_cofactors, independent_vars)
  formula_str <- paste(response_var, "~", paste(predictors, collapse = " + "))
  return(as.formula(formula_str))
}

Function to create a PCA column

addPCAComponent <- function(dataframe, columns, pcaColumnName) {
  subset_df <- dataframe[, columns, drop = FALSE]
  pca_result <- prcomp(subset_df, center = TRUE, scale. = TRUE)
  
  # Extract the first principal component
  first_pca_component <- pca_result$x[, 1]
  dataframe[[pcaColumnName]] <- first_pca_component
  return(dataframe)
}

Saving the names of the independent variables and the continuous independent variables

#missing fibronectin and pla2r because they have low levels of fluorescence => inaccurate OR.
tot_nHLA_var<-c("AT1R.U.mL..CO.17.",
"ENO1.Normalized..CO.4218.", "FLRT2.Normalized..CO.688.",
"VM.Normalized..CO.820.", "TUBA1B.Normalized..CO.1987.",
"IFIH1.Normalized..CO.3870.", "PTPRN.Normalized..CO.3042.",
"AURKA.Normalized..CO.4892.", "PPIA.Normalized..CO.3292.",
"EIF2A.Normalized..CO.6901.", "PECR.Normalized..CO.4120.",
"PRKCH.Normalized..CO.1048.", "CXCL11.Normalized..CO.309.",
"CXCL10.Normalized..CO.285.", "AGRIN.Normalized..CO.504.",
"GAPDH.Normalized..CO.508.", "MYOSIN.Normalized..CO.9341.",
"AGT.Normalized..CO.1641.", "CHAF1B.Normalized..CO.11722.",
"LG3.Normalized..CO.3801.", "GSTT1.Normalized..CO.6136.", "LMNA.Normalized..CO.6633.",
"PRKCZ.Normalized..CO.9104.", "LMNB1.Normalized..CO.2065.",
"ARHGDIB.Normalized..CO.3918.", "HNRNPK.Normalized..CO.845.",
"IFNG.Normalized..CO.498.", "REG3A.Normalized..CO.86.",
"NUCLEOLIN.Normalized..CO.3669.", "CD36.Normalized..CO.1591.",
"TNFA.Normalized..CO.5331.", "CXCL9.Normalized..CO.575.",
"GDNF.Normalized..CO.1004.", "COLLAGEN.I.Normalized..CO.375.",
"COLLAGEN.II.Normalized..CO.155.", "COLLAGEN.III.Normalized..CO.128.",
"COLLAGEN.IV.Normalized..CO.71.", "COLLAGEN.V.Normalized..CO.187.")

tot_nHLA_var2<-c("AT1R.U.mL..CO.17.",
"ENO1.Normalized..CO.4218.", "FLRT2.Normalized..CO.688.",
"VM.Normalized..CO.820.", "TUBA1B.Normalized..CO.1987.",
"IFIH1.Normalized..CO.3870.", "PTPRN.Normalized..CO.3042.",
"AURKA.Normalized..CO.4892.", "PPIA.Normalized..CO.3292.",
"EIF2A.Normalized..CO.6901.", "PECR.Normalized..CO.4120.",
"PRKCH.Normalized..CO.1048.", "CXCL11.Normalized..CO.309.",
"CXCL10.Normalized..CO.285.", "AGRIN.Normalized..CO.504.",
"GAPDH.Normalized..CO.508.", "MYOSIN.Normalized..CO.9341.",
"AGT.Normalized..CO.1641.", "CHAF1B.Normalized..CO.11722.",
"PLA2R.Normalized..CO.195.", "LG3.Normalized..CO.3801.",
"GSTT1.Normalized..CO.6136.", "LMNA.Normalized..CO.6633.",
"PRKCZ.Normalized..CO.9104.", "LMNB1.Normalized..CO.2065.",
"ARHGDIB.Normalized..CO.3918.", "HNRNPK.Normalized..CO.845.",
"IFNG.Normalized..CO.498.", "REG3A.Normalized..CO.86.",
"NUCLEOLIN.Normalized..CO.3669.", "CD36.Normalized..CO.1591.",
"TNFA.Normalized..CO.5331.", "CXCL9.Normalized..CO.575.",
"GDNF.Normalized..CO.1004.", "COLLAGEN.I.Normalized..CO.375.",
"COLLAGEN.II.Normalized..CO.155.", "COLLAGEN.III.Normalized..CO.128.",
"COLLAGEN.IV.Normalized..CO.71.", "COLLAGEN.V.Normalized..CO.187.",
"FIBRONECTIN.Normalized..CO.141.")

tot_clin_var<-c("Age.at.Transplant", "DSA_num")

tot_independant_var_cont<-c(tot_nHLA_var, tot_clin_var)

nc_var<-c("AT1R.U.mL..CO.17.", "ENO1.Normalized..CO.4218.",
"FLRT2.Normalized..CO.688.", "IFIH1.Normalized..CO.3870.",
"AURKA.Normalized..CO.4892.", "PPIA.Normalized..CO.3292.",
"EIF2A.Normalized..CO.6901.", "PECR.Normalized..CO.4120.",
"PRKCH.Normalized..CO.1048.", "CXCL11.Normalized..CO.309.",
"CXCL10.Normalized..CO.285.", "AGRIN.Normalized..CO.504.",
"GAPDH.Normalized..CO.508.", "MYOSIN.Normalized..CO.9341.",
"CHAF1B.Normalized..CO.11722.", "PLA2R.Normalized..CO.195.",
"LG3.Normalized..CO.3801.", "GSTT1.Normalized..CO.6136.",
"LMNA.Normalized..CO.6633.", "PRKCZ.Normalized..CO.9104.",
"LMNB1.Normalized..CO.2065.", "ARHGDIB.Normalized..CO.3918.",
"HNRNPK.Normalized..CO.845.", "IFNG.Normalized..CO.498.",
"REG3A.Normalized..CO.86.", "NUCLEOLIN.Normalized..CO.3669.",
"TNFA.Normalized..CO.5331.", "CXCL9.Normalized..CO.575.",
"GDNF.Normalized..CO.1004.", "COLLAGEN.I.Normalized..CO.375.",
"COLLAGEN.II.Normalized..CO.155.", "COLLAGEN.III.Normalized..CO.128.",
"COLLAGEN.IV.Normalized..CO.71.", "COLLAGEN.V.Normalized..CO.187.",
"FIBRONECTIN.Normalized..CO.141.")

Replace normalized fluorescence values with “1”, presence or “0” absence.

df_nocut<- load_data()
#not doing because screws analysis
#df<-process_columns_with_cutoff(tot_nHLA_var, df_nocut)
df<-df_nocut

Plot the distributions of continuous variables

plot_variable_distributions(df_nocut, tot_independant_var_cont, "Condition")

Subset into pre and post transplant data

df_pre<- subset(df, Pre==1)
df_post<- subset(df, Pre==0)

Correlation plot for pre-transplant data

df_pre_nc <- df_nocut %>% filter(Pre==1)
create_subset_df(df_pre_nc, tot_independant_var_cont) %>% cor() %>% corrplot()

find_highly_correlated_variables(df_pre_nc, tot_independant_var_cont, 0.8)
## [1] "VM.Normalized..CO.820."           "TUBA1B.Normalized..CO.1987."     
## [3] "AGT.Normalized..CO.1641."         "CD36.Normalized..CO.1591."       
## [5] "PPIA.Normalized..CO.3292."        "COLLAGEN.III.Normalized..CO.128."
## [7] "COLLAGEN.IV.Normalized..CO.71."   "FLRT2.Normalized..CO.688."       
## [9] "COLLAGEN.I.Normalized..CO.375."

Correlation plot for post-transplant data

df_post_nc <- df_nocut %>% filter(Pre==0)
create_subset_df(df_post_nc, tot_independant_var_cont) %>% cor() %>% corrplot()

find_highly_correlated_variables(df_post_nc, tot_independant_var_cont, 0.8)
## [1] "TUBA1B.Normalized..CO.1987."      "CD36.Normalized..CO.1591."       
## [3] "PPIA.Normalized..CO.3292."        "AGT.Normalized..CO.1641."        
## [5] "COLLAGEN.III.Normalized..CO.128." "FLRT2.Normalized..CO.688."       
## [7] "VM.Normalized..CO.820."           "COLLAGEN.I.Normalized..CO.375."

variables that are colinear are replaced with a PCA

all_colin<-c("VM.Normalized..CO.820.", "TUBA1B.Normalized..CO.1987.", "AGT.Normalized..CO.1641.", "CD36.Normalized..CO.1591.", "PPIA.Normalized..CO.3292.", "COLLAGEN.III.Normalized..CO.128.", "COLLAGEN.IV.Normalized..CO.71.", "FLRT2.Normalized..CO.688.", "COLLAGEN.I.Normalized..CO.375.")

colin_set1<-c("VM.Normalized..CO.820.", "TUBA1B.Normalized..CO.1987.", "AGT.Normalized..CO.1641.", "CD36.Normalized..CO.1591.", "PPIA.Normalized..CO.3292.", "FLRT2.Normalized..CO.688.")
colin_set2<-all_colin[!all_colin %in% colin_set1]

df_pre<-addPCAComponent(df_pre, colin_set1, "PCA1")
df_post<-addPCAComponent(df_post, colin_set1, "PCA1")

df_pre<-addPCAComponent(df_pre, colin_set2, "PCA2")
df_post<-addPCAComponent(df_post, colin_set2, "PCA2")

df_pre<-df_pre %>% dplyr::select(-all_of(all_colin))
df_post<-df_post %>% dplyr::select(-all_of(all_colin))

tot_nHLA_var<-tot_nHLA_var[!tot_nHLA_var %in% all_colin]
tot_nHLA_var<-c(tot_nHLA_var, "PCA1", "PCA2")
tot_independant_var_cont<-tot_independant_var_cont[!tot_independant_var_cont %in% all_colin]
tot_independant_var_cont<-c(tot_independant_var_cont, "PCA1", "PCA2")

Function to remove Columns in which there is only one instance (only presence or only absence) as this does not help with prediction.

find_factor_columns_with_missing_level <- function(dataframe, columns) {
  # Initialize an empty vector to store the names of columns that meet the criteria
  columns_with_missing_level <- c()
  
  for (col_name in columns) {
    if (is.factor(dataframe[[col_name]]) && length(levels(dataframe[[col_name]])) == 2) {
      level_counts <- table(dataframe[[col_name]])
      
      # Check if any of the levels has no instances
      if (any(level_counts == 0)) {
        columns_with_missing_level <- c(columns_with_missing_level, col_name)
      }
    }
  }
  return(columns_with_missing_level)
}

Create datasets for DGF and failure analysis

df_pre_dgf<- subset(df_pre, !Condition==2)
df_pre_dgf$Condition <- factor(df_pre_dgf$Condition)

df_post_dgf<-subset(df_post, !Condition==2)
df_post_dgf$Condition <- factor(df_post_dgf$Condition)

df_pre_fail<-subset(df_pre, !Condition==1)
df_pre_fail$Condition <- factor(df_pre_fail$Condition)

df_post_fail<-subset(df_post, !Condition==1)
df_post_fail$Condition <- factor(df_post_fail$Condition)

Multinomial logistic regression for pre-transplant data

pre_dgf_formula<-create_pom_formula("Condition", tot_clin_var, tot_nHLA_var, df_pre)
pre_dgf_model<-glm(pre_dgf_formula, family = "binomial",data = df_pre_dgf)
#pre_dgf_step.model<-stepAIC(pre_dgf_model, direction = "backward", trace = FALSE)
#optireg_pre_dgf<-pre_dgf_step.model$terms
#pre_dgf_model_opt<-glm(optireg_pre_dgf, family = "binomial",data = df_pre_dgf)
summary(pre_dgf_model)
## 
## Call:
## glm(formula = pre_dgf_formula, family = "binomial", data = df_pre_dgf)
## 
## Coefficients:
##                                   Estimate Std. Error z value Pr(>|z|)  
## (Intercept)                     -1.2830970  1.9045903  -0.674   0.5005  
## Age.at.Transplant                0.0428024  0.0336888   1.271   0.2039  
## DSA_num                          1.0914282  2.1455947   0.509   0.6110  
## AT1R.U.mL..CO.17.               -0.0148840  0.0738407  -0.202   0.8403  
## ENO1.Normalized..CO.4218.       -0.0003579  0.0389132  -0.009   0.9927  
## IFIH1.Normalized..CO.3870.      -0.0046864  0.0182464  -0.257   0.7973  
## PTPRN.Normalized..CO.3042.      -0.0243427  0.0162702  -1.496   0.1346  
## AURKA.Normalized..CO.4892.       0.0626144  0.0313737   1.996   0.0460 *
## EIF2A.Normalized..CO.6901.       0.0061056  0.0183817   0.332   0.7398  
## PECR.Normalized..CO.4120.        0.0833783  0.0541459   1.540   0.1236  
## PRKCH.Normalized..CO.1048.      -0.0433572  0.0383565  -1.130   0.2583  
## CXCL11.Normalized..CO.309.      -0.0161343  0.0195859  -0.824   0.4101  
## CXCL10.Normalized..CO.285.      -0.0109939  0.0333204  -0.330   0.7414  
## AGRIN.Normalized..CO.504.       -0.0015165  0.0069283  -0.219   0.8267  
## GAPDH.Normalized..CO.508.        0.0011635  0.0253096   0.046   0.9633  
## MYOSIN.Normalized..CO.9341.     -0.0336616  0.0206109  -1.633   0.1024  
## CHAF1B.Normalized..CO.11722.    -0.0305358  0.0203113  -1.503   0.1327  
## LG3.Normalized..CO.3801.         0.1058020  0.0473588   2.234   0.0255 *
## GSTT1.Normalized..CO.6136.      -0.0367313  0.0209455  -1.754   0.0795 .
## LMNA.Normalized..CO.6633.        0.0520271  0.0376736   1.381   0.1673  
## PRKCZ.Normalized..CO.9104.      -0.0082422  0.0161995  -0.509   0.6109  
## LMNB1.Normalized..CO.2065.      -0.0081659  0.0385510  -0.212   0.8322  
## ARHGDIB.Normalized..CO.3918.     0.0297790  0.0237262   1.255   0.2094  
## HNRNPK.Normalized..CO.845.      -0.0385209  0.0638070  -0.604   0.5460  
## IFNG.Normalized..CO.498.         0.0571146  0.0513555   1.112   0.2661  
## REG3A.Normalized..CO.86.         0.0052798  0.0232114   0.227   0.8201  
## NUCLEOLIN.Normalized..CO.3669.  -0.0376766  0.0172336  -2.186   0.0288 *
## TNFA.Normalized..CO.5331.        0.1892495  0.0950230   1.992   0.0464 *
## CXCL9.Normalized..CO.575.        0.3223528  0.1694657   1.902   0.0571 .
## GDNF.Normalized..CO.1004.        0.0552137  0.0987410   0.559   0.5760  
## COLLAGEN.II.Normalized..CO.155. -0.0395747  0.1083155  -0.365   0.7148  
## COLLAGEN.V.Normalized..CO.187.  -0.2499026  0.1373879  -1.819   0.0689 .
## PCA1                            -0.6399012  0.4976879  -1.286   0.1985  
## PCA2                             0.3619538  0.7372108   0.491   0.6234  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 116.227  on 85  degrees of freedom
## Residual deviance:  71.231  on 52  degrees of freedom
## AIC: 139.23
## 
## Number of Fisher Scoring iterations: 7
pre_fail_formula<-create_pom_formula("Condition", tot_clin_var, tot_nHLA_var, df_pre)
pre_fail_model<-glm(pre_fail_formula, family = "binomial",data = df_pre_fail)
#pre_fail_step.model<-stepAIC(pre_fail_model, direction = "backward", trace = FALSE)
#optireg_pre_fail<-pre_fail_step.model$terms
#pre_fail_model_opt<-glm(optireg_pre_fail, family = "binomial",data = df_pre_fail)
summary(pre_fail_model)
## 
## Call:
## glm(formula = pre_fail_formula, family = "binomial", data = df_pre_fail)
## 
## Coefficients:
##                                  Estimate Std. Error z value Pr(>|z|)   
## (Intercept)                      2.916674   1.228937   2.373  0.01763 * 
## Age.at.Transplant               -0.039896   0.017205  -2.319  0.02040 * 
## DSA_num                          0.791697   0.580884   1.363  0.17291   
## AT1R.U.mL..CO.17.                0.070367   0.034358   2.048  0.04055 * 
## ENO1.Normalized..CO.4218.        0.008691   0.021780   0.399  0.68985   
## IFIH1.Normalized..CO.3870.      -0.013321   0.014500  -0.919  0.35825   
## PTPRN.Normalized..CO.3042.      -0.017953   0.010989  -1.634  0.10230   
## AURKA.Normalized..CO.4892.       0.023580   0.023897   0.987  0.32379   
## EIF2A.Normalized..CO.6901.       0.004931   0.009506   0.519  0.60394   
## PECR.Normalized..CO.4120.        0.031161   0.028303   1.101  0.27091   
## PRKCH.Normalized..CO.1048.      -0.017382   0.015724  -1.105  0.26894   
## CXCL11.Normalized..CO.309.      -0.009347   0.004487  -2.083  0.03722 * 
## CXCL10.Normalized..CO.285.       0.017628   0.014946   1.179  0.23823   
## AGRIN.Normalized..CO.504.        0.002586   0.003396   0.762  0.44634   
## GAPDH.Normalized..CO.508.       -0.050911   0.026907  -1.892  0.05847 . 
## MYOSIN.Normalized..CO.9341.     -0.015287   0.010465  -1.461  0.14407   
## CHAF1B.Normalized..CO.11722.     0.004292   0.012760   0.336  0.73660   
## LG3.Normalized..CO.3801.         0.067364   0.027074   2.488  0.01284 * 
## GSTT1.Normalized..CO.6136.      -0.008830   0.010595  -0.833  0.40462   
## LMNA.Normalized..CO.6633.       -0.003124   0.019711  -0.158  0.87409   
## PRKCZ.Normalized..CO.9104.      -0.004527   0.010133  -0.447  0.65503   
## LMNB1.Normalized..CO.2065.      -0.041961   0.037736  -1.112  0.26615   
## ARHGDIB.Normalized..CO.3918.    -0.021202   0.016216  -1.307  0.19105   
## HNRNPK.Normalized..CO.845.      -0.022611   0.033465  -0.676  0.49925   
## IFNG.Normalized..CO.498.         0.026272   0.021535   1.220  0.22249   
## REG3A.Normalized..CO.86.         0.011244   0.008161   1.378  0.16829   
## NUCLEOLIN.Normalized..CO.3669.  -0.021701   0.008362  -2.595  0.00945 **
## TNFA.Normalized..CO.5331.        0.086481   0.056294   1.536  0.12448   
## CXCL9.Normalized..CO.575.        0.045367   0.084316   0.538  0.59054   
## GDNF.Normalized..CO.1004.        0.011658   0.050795   0.230  0.81848   
## COLLAGEN.II.Normalized..CO.155.  0.081847   0.051395   1.592  0.11128   
## COLLAGEN.V.Normalized..CO.187.  -0.097263   0.059283  -1.641  0.10087   
## PCA1                            -0.165889   0.336398  -0.493  0.62192   
## PCA2                            -0.352706   0.359910  -0.980  0.32709   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 194.77  on 152  degrees of freedom
## Residual deviance: 126.84  on 119  degrees of freedom
## AIC: 194.84
## 
## Number of Fisher Scoring iterations: 8

Multinomial logistic regression for post-transplant data

post_dgf_formula<-create_pom_formula("Condition", tot_clin_var, tot_nHLA_var, df_post)
post_dgf_model<-glm(post_dgf_formula, family = "binomial",data = df_post_dgf)
#post_dgf_step.model<-stepAIC(post_dgf_model, direction = "backward", trace = FALSE)
#optireg_post_dgf<-post_dgf_step.model$terms
#post_dgf_model_opt<-glm(optireg_post_dgf, family = "binomial",data = df_post_dgf)
summary(post_dgf_model)
## 
## Call:
## glm(formula = post_dgf_formula, family = "binomial", data = df_post_dgf)
## 
## Coefficients:
##                                   Estimate Std. Error z value Pr(>|z|)  
## (Intercept)                     -1.9365147  2.0445183  -0.947   0.3436  
## Age.at.Transplant                0.0449142  0.0317984   1.412   0.1578  
## DSA_num                          2.1436691  2.0486757   1.046   0.2954  
## AT1R.U.mL..CO.17.               -0.0467240  0.0545789  -0.856   0.3920  
## ENO1.Normalized..CO.4218.        0.0431505  0.0533377   0.809   0.4185  
## IFIH1.Normalized..CO.3870.      -0.0004939  0.0250485  -0.020   0.9843  
## PTPRN.Normalized..CO.3042.      -0.0263981  0.0238215  -1.108   0.2678  
## AURKA.Normalized..CO.4892.       0.0774523  0.0322191   2.404   0.0162 *
## EIF2A.Normalized..CO.6901.       0.0089806  0.0194839   0.461   0.6449  
## PECR.Normalized..CO.4120.        0.1263991  0.0684166   1.847   0.0647 .
## PRKCH.Normalized..CO.1048.      -0.0591217  0.0659747  -0.896   0.3702  
## CXCL11.Normalized..CO.309.      -0.0280399  0.0406095  -0.690   0.4899  
## CXCL10.Normalized..CO.285.      -0.0299910  0.0507300  -0.591   0.5544  
## AGRIN.Normalized..CO.504.       -0.0321584  0.0185673  -1.732   0.0833 .
## GAPDH.Normalized..CO.508.       -0.0621826  0.0427140  -1.456   0.1455  
## MYOSIN.Normalized..CO.9341.     -0.0510456  0.0246392  -2.072   0.0383 *
## CHAF1B.Normalized..CO.11722.    -0.0360798  0.0261617  -1.379   0.1679  
## LG3.Normalized..CO.3801.         0.1121599  0.0744646   1.506   0.1320  
## GSTT1.Normalized..CO.6136.      -0.0414309  0.0243092  -1.704   0.0883 .
## LMNA.Normalized..CO.6633.       -0.0345304  0.0586055  -0.589   0.5557  
## PRKCZ.Normalized..CO.9104.      -0.0232599  0.0175539  -1.325   0.1852  
## LMNB1.Normalized..CO.2065.       0.0775619  0.0663910   1.168   0.2427  
## ARHGDIB.Normalized..CO.3918.     0.0177367  0.0272987   0.650   0.5159  
## HNRNPK.Normalized..CO.845.      -0.0590927  0.0823071  -0.718   0.4728  
## IFNG.Normalized..CO.498.         0.0597575  0.0480484   1.244   0.2136  
## REG3A.Normalized..CO.86.         0.0442035  0.0445267   0.993   0.3208  
## NUCLEOLIN.Normalized..CO.3669.  -0.0135224  0.0121236  -1.115   0.2647  
## TNFA.Normalized..CO.5331.        0.1745214  0.1783264   0.979   0.3277  
## CXCL9.Normalized..CO.575.        0.1897164  0.2068158   0.917   0.3590  
## GDNF.Normalized..CO.1004.        0.1629479  0.0905470   1.800   0.0719 .
## COLLAGEN.II.Normalized..CO.155.  0.0390213  0.1743071   0.224   0.8229  
## COLLAGEN.V.Normalized..CO.187.   0.0401761  0.0817565   0.491   0.6231  
## PCA1                            -0.8350594  0.5711057  -1.462   0.1437  
## PCA2                            -0.1234494  0.3200662  -0.386   0.6997  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 116.227  on 85  degrees of freedom
## Residual deviance:  69.788  on 52  degrees of freedom
## AIC: 137.79
## 
## Number of Fisher Scoring iterations: 8
post_fail_formula<-create_pom_formula("Condition", tot_clin_var, tot_nHLA_var, df_post)
post_fail_model<-glm(post_fail_formula, family = "binomial",data = df_post_fail)
#post_fail_step.model<-stepAIC(post_fail_model, direction = "backward", trace = FALSE)
#optireg_post_fail<-post_fail_step.model$terms
#post_fail_model_opt<-glm(optireg_post_fail, family = "binomial",data = df_post_fail)
summary(post_fail_model)
## 
## Call:
## glm(formula = post_fail_formula, family = "binomial", data = df_post_fail)
## 
## Coefficients:
##                                  Estimate Std. Error z value Pr(>|z|)   
## (Intercept)                      1.830994   1.125440   1.627  0.10376   
## Age.at.Transplant               -0.042412   0.017361  -2.443  0.01457 * 
## DSA_num                          2.169908   0.670093   3.238  0.00120 **
## AT1R.U.mL..CO.17.                0.025368   0.031583   0.803  0.42184   
## ENO1.Normalized..CO.4218.       -0.005129   0.021416  -0.240  0.81071   
## IFIH1.Normalized..CO.3870.      -0.045777   0.029042  -1.576  0.11497   
## PTPRN.Normalized..CO.3042.      -0.009943   0.012812  -0.776  0.43774   
## AURKA.Normalized..CO.4892.      -0.008551   0.022032  -0.388  0.69793   
## EIF2A.Normalized..CO.6901.       0.017606   0.012100   1.455  0.14565   
## PECR.Normalized..CO.4120.       -0.005563   0.041362  -0.134  0.89302   
## PRKCH.Normalized..CO.1048.       0.029017   0.022836   1.271  0.20386   
## CXCL11.Normalized..CO.309.      -0.010330   0.010111  -1.022  0.30692   
## CXCL10.Normalized..CO.285.      -0.016401   0.014396  -1.139  0.25457   
## AGRIN.Normalized..CO.504.       -0.003696   0.005282  -0.700  0.48410   
## GAPDH.Normalized..CO.508.        0.005447   0.035031   0.155  0.87643   
## MYOSIN.Normalized..CO.9341.     -0.039516   0.015117  -2.614  0.00895 **
## CHAF1B.Normalized..CO.11722.     0.017552   0.014781   1.187  0.23504   
## LG3.Normalized..CO.3801.         0.161777   0.059839   2.704  0.00686 **
## GSTT1.Normalized..CO.6136.       0.012148   0.012134   1.001  0.31677   
## LMNA.Normalized..CO.6633.        0.010705   0.026859   0.399  0.69022   
## PRKCZ.Normalized..CO.9104.       0.008211   0.012223   0.672  0.50174   
## LMNB1.Normalized..CO.2065.      -0.020385   0.043703  -0.466  0.64089   
## ARHGDIB.Normalized..CO.3918.    -0.039147   0.025609  -1.529  0.12636   
## HNRNPK.Normalized..CO.845.      -0.097622   0.048342  -2.019  0.04344 * 
## IFNG.Normalized..CO.498.         0.045920   0.028172   1.630  0.10310   
## REG3A.Normalized..CO.86.         0.005473   0.015182   0.360  0.71849   
## NUCLEOLIN.Normalized..CO.3669.  -0.013220   0.008047  -1.643  0.10044   
## TNFA.Normalized..CO.5331.        0.044417   0.065844   0.675  0.49994   
## CXCL9.Normalized..CO.575.        0.143552   0.115775   1.240  0.21500   
## GDNF.Normalized..CO.1004.        0.099958   0.112486   0.889  0.37420   
## COLLAGEN.II.Normalized..CO.155.  0.127697   0.087526   1.459  0.14457   
## COLLAGEN.V.Normalized..CO.187.   0.010814   0.081212   0.133  0.89407   
## PCA1                             0.038354   0.266146   0.144  0.88541   
## PCA2                            -0.273048   0.207891  -1.313  0.18904   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 194.77  on 152  degrees of freedom
## Residual deviance: 109.88  on 119  degrees of freedom
## AIC: 177.88
## 
## Number of Fisher Scoring iterations: 8

Plotting the Odds Ratios

library(forestmodel)

to_rm<-c("COLLAGEN.V.Normalized..CO.187.", "PCA1", "PCA2", "FIBRONECTIN.Normalized..CO.141.", "PLA2R.Normalized..CO.195.", "DSA_num", "Age.at.Transplant")

to_include<-tot_independant_var_cont[!tot_independant_var_cont %in% to_rm]
f_pre<-forest_model(model_list=list("Delayed Graft function"=pre_dgf_model, "Organ Rejection"=pre_fail_model), covariates = to_include, format_options = forest_model_format_options(point_size = 2))
f_pre

ggsave("pre_trans_plot.svg")

f_post<-forest_model(model_list=list("Delayed Graft function"=post_dgf_model, "Organ Rejection"=post_fail_model), covariates = to_include, format_options = forest_model_format_options(point_size =2))
f_post

ggsave("post_trans_plot.svg")

```{}